******************************************************************************
*This file cleans the location of institutions at the CZ level from the IPEDS*
******************************************************************************

*global experiment /Users/giuseppedigiacomo/Dropbox/Education and Local Labor Market Shocks/Analysis/Data/IPEDS/Builders/zip-codes aggregation/

**** zip to code cross-walk
use "$xwalk_geo/ZIP_COUNTY_032010.dta", clear
drop if fips == 2 | fips == 15 | (fips >= 60&fips<=78) // dropping Alaska, Puerto Rico...etc
replace ctyfips = 12025 if ctyfips == 12086 // Adjust county codes that changed over time
replace ctyfips= 4013 if ctyfips == 4075 // County with wrong fips code (Maripoca, AZ)
codebook zip // there are duplicatesczones_list

gen ourRatio = TOT_RATIO
	
* generate a file to fix the zipcodes
preserve
	quietly bysort zip:  gen dup = cond(_N==1,0,_n)
	egen maxDup = max(dup), by(zip) // to have an idea of how many times I should expand the IPEDS unitid 
	keep if dup == maxDup
	keep zip dup 
	tab dup
	save "$clean_data_education/duplicates.dta", replace
restore

* merge with czones
merge m:1 ctyfips using "$xwalk_geo/ctyfips_czone.dta", keep(1 3) nogenerate
merge m:1 czone using "$list/czones_list.dta", keep(1 3) nogenerate
*Follow Dorn's crosswalk
	replace czone = 28900 if ctyfips==8014 & mi(czone)
	replace name_czone = "Denver city, CO" if ctyfips==8014 & mi(name_czone)
	replace statefip = 8 if ctyfips==8014 & mi(statefip)
	replace region = 4 if ctyfips==8014 & mi(region)
	replace division = 8 if ctyfips==8014 & mi(division) 

collapse (sum) ourRatio (first) fips statefip region division state name_czone , by(zip czone) // delete duplicates when they end up in the same czone

save "$xwalk_geo/ZIP_CZ.dta", replace

**** locations from IPEDS
use "$clean_data_education/locationDatabase.dta", clear

* save fips to clean for non-mainland states 
preserve 
	keep unitid year fips 
	keep if fips == 2 | fips == 15 | (fips >= 60&fips<=78) // dropping Alaska, Puerto Rico...etc
	gen nonMainland = 1 if fips == 2 | fips == 15 | (fips >= 60&fips<=78)
	label variable nonMainland "one if nonMainland"
	save "$clean_data_education/unitid_fips.dta", replace
restore

drop if fips == 2 | fips == 15 | (fips >= 60&fips<=78) // dropping Alaska, Puerto Rico...etc

* Homogenize some city names
replace city = "Fort Lauderdale" if city == "FT LAUDERDALE"  | city == "Ft. Lauderdale" | city == "FT. LAUDERDALE"
replace city = "Fort Myers" if city == "FT MYERS" | city == "FT. MYERS"
replace city = "Fort Valley" if city == "FT VALLEY" | city == "FORT VALLEY"
replace city = "Fort Wayne" if city == "FT WAYNE" | city == "FORT WAYNE" | city == "Ft. Wayne"
replace city = "Fort Madison" if city == "FT MADISON" | city == "FORT MADISON"
replace city = "Fort Dodge" if city == "FT DODGE" | city == "FORT DODGE"
replace city = "Fort Scott" if city == "FT SCOTT" | city == "FORT SCOTT"
replace city = "Fort Leavnworth" if city == "FT LEAVNWORTH" | city == "FORT LEAVNWORTH" | city == "FT LEAVENWORTH" | city == "FORT LEAVENWORTH"
replace city = "Fort Mitchell" if city == "FT MITCHELL" | city == "FORT MITCHELL"
replace city = "Fort Kent" if city == "FT KENT" | city == "FORT KENT"
replace city = "Fort Totten" if city == "FT TOTTEN" | city == "FORT TOTTEN"
replace city = "Fort Yates" if city == "FT YATES" | city == "FORT YATES"
replace city = "Fort Worth" if city == "FT WORTH" | city == "FORT WORTH" | city == "FT. WORTH" | city == "Ft Worth"
replace city = "Fort Collins" if city == "FT COLLINS" | city == "FORT COLLINS"
replace city = "Fort Washington" if city == "FT WASHINGTON" | city == "FORT WASHINGTON"
replace city = "Fort Cobb" if city == "FT COBB" | city == "FORT COBB"
replace city = "Fort Gordon" if city == "FT GORDON" | city == "FORT GORDON"
replace city = "Fort McPherson" if city == "FT MCPHERSON" | city == "FORT MCPHERSON" | city == "FORT MC PHERSON"
replace city = "Fort Smith" if city == "FT SMITH" | city == "FORT SMITH" | city == "Ft Smith" | city == "FT.SMITH" | city == "FT. SMITH"
replace city = "Fort Walton Beach" if city == "FT WALTON BEACH" | city == "FORT WALTON BEACH"
replace city = "Fort Pierce" if city == "FT PIERCE" | city == "FORT PIERCE"
replace city = "Salt Lake City" if city == "SALT LAKE CITY" | city == "SALT LAKE CY"
replace city = "Alameda" if city == "ALAMEDA" | city == "ALAMEDA,"
replace city = "Albuquerque" if city == "ALBUQUERQUE" | city == "ALBUQUERUE" | city == "Albuequerque"
replace city = "Alexander City" if city == "ALEXANDER" | city == "ALEXANDER CITY" 
replace city = "Alhambra" if city == "ALHAMBRA" | city == "ALHAMBRA,"
replace city = "Altadena" if city == "ALTADENA" | city == "ALTADENA,"
replace city = "ANAHEIM" if city == "ANAHEIM," | city == "ANAHEM"
replace city = "ANNANDALE ON HUDSON" if city == "ANNANDLE-HDSN"
replace city = "APPLETON" if city == "APPLETON,"
replace city = "ARLINGTON HEIGHTS" if city == "ARLINGTON HGTS" | city == "ARLINGTON HTS"
replace city = "ATASCADERO" if city == "ATASCADRO"
replace city = "AUBURN UNIVERSITY" if city == "AUBURN UNIV"
replace city = "STATE UNIVERSITY" if city == "STATE UNIV"
replace city = "AURORA" if city == "AUROA"
replace city = "BERKELEY" if city == "BERKELEY,"
replace city = "NORTH PALM BEACH" if city == "NORTH PALM BEAC" | city == "N. PALM BEACH"
replace city = "BIRMINGHAM" if city == "MOUNTAIN BROOK"
replace city = "BULLHEAD CITY" if city == "BULL HEAD CITY" | city == "BULL HEAD"
replace city = "North Little Rock" if city == "N LITTLE ROCK"
replace city = "HOT SPRINGS" if city == "HOT SPRINGS NP"
replace city = "Solon" if city == "6864 Cochran Rd"
replace city = "RIPLEY" if city == "6122574010"
replace city = "SALEM" if city == "1288M"
replace city = "EDEN PRAIRIE" if city == "EDEN PRARIE"
replace city = "ALBANY" if city == "ALABANY"
replace city = "Altamonte Springs" if city == "Altamonte" | city == "ALTAMONTE SPR" | city == "ALTAMONTE SPRGS" | city == "ALTAMONTE SPRIN" 
replace city = "ABINGDON" if city == "ABINGTON" & fips == 51
replace city = "PALO ALTO" if city == "PALO ALO" & fips == 6
replace city = "NATIONAL CITY" if city == "NATIONAL" 
replace city = "SN LUIS OBISPO" if city == "SN LUIS OBSPO" 
replace city = "BERRIEN SPRINGS" if city == "BERRIEN SPG"
replace city = "BLOOMFIELD HILLS" if city == "BLOOMFIELD HILL" | city == "BLOOMFLD HLS" | city == "BLOOMFIELD HTS"
replace city = "COLORADO SPRINGS" if city == "COLORADO SPG" | city == "COLORADO SPGS" | city == "COLORADO SPR" | city == "COLORADO SPRING"
replace city = "ELIZABETH CITY" if city == "ELIZABETH CIT" | city == "ELIZABETH CY"
replace city = "EUFAULA" if city == "EUGAULA"
replace city = "GLENWOOD SPRINGS" if city == "GLENWOOD SPG" | city == "GLENWOOD SPGS"
replace city = "INCLINE VILLAGE" if city == "INCLINE VLG"
replace city = "LITTLETON" if city == "LITTLETOWN"
replace city = "NEW PORT RICHEY" if city == "NEW PORT RICHIE"
replace city = "STERLING HEIGHTS" if city == "STERLING HGTS" | city == "STERLING HTS"
replace city = "UNIVERSITY CENTER" if city == "UNIVERSITY CENT" | city == "UNIV CTR"
replace city = "W PALM BEACH" if city == "W PALM BCH"
replace city = "SOUTH LAKE TAHOE" if city == "S LAKE TAHOE"
replace city = "COUNCIL BLUFFS" if city == "COUNCIL BLUFF" | city == "COUNCIL BLF"
replace city = "MARTINSVILLE" if city == "MARTINSIDE" 
replace city = "WELLESLEY" if city == "WETLESLEY" | city == "Wellesley"
replace city = "Dayton" if unitid == 453419
replace city = "Cleveland" if unitid == 452364
replace city = "Columbus" if unitid == 452285
replace city = "West Chester" if unitid == 452799
replace city = "Purcellville" if unitid == 451927
replace city = "LITTLE ROCK" if unitid == 485050
replace city = "Omaha" if unitid == 485227
replace city = "tempe" if unitid == 105516
replace city = "mount gay" if city == "MT GAY"
replace city = "NEWBURGH" if city == "NEWBURGH\"

replace zip = "85282-2371" if unitid == 105516
replace zip = "43207" if unitid == 452285
replace zip = "45402" if unitid == 453419
replace zip = "5309" if unitid == 452364
replace zip = "45069" if unitid == 452799
replace zip = "20132" if unitid == 451927
replace zip = "72205" if unitid == 485050
replace zip = "35570" if city == "HAMILTON" & zip == "33570"
replace zip = "02474" if unitid == 165839 & zip == "02174"
replace zip = "68124" if unitid == 485227
replace zip = "39401" if unitid == 438674 & zip == "00000-0000"
replace zip = "61244" if unitid == 143695 & zip == "0"
replace zip = "98310" if unitid == 234906 & zip == "."
replace zip = "89431" if unitid == 368708 & zip == "."
replace zip = "80220" if unitid == 438559 & zip == "."
replace zip = "80112" if unitid == 438568 & zip == "."
replace zip = "80301" if unitid == 381884 & zip == "&"
replace zip = "03053" if zip == "30530" & fips == 33
replace zip = "03220" if unitid == 182704 & zip == "32200"
replace zip = "06412" if unitid == 128799 & zip == "64120"
replace zip = "038671905" if unitid == 363068 & zip == "386710905"
replace zip = "98584" if unitid == 235705 & zip == "95721"
replace zip = "35150" if unitid == 366872 & zip == "36150"
replace zip = "925391667" if unitid == 117159 & zip != "925391667"
replace zip = "01742" if (unitid == 165635 | unitid == 165909) & city == "CONCORD" & zip == "2173"
replace zip = "34601" if unitid == 137616 & zip == "33512"
replace zip = "48329" if unitid == 170912 & zip == "48020"
replace zip = "48334" if unitid == 260503 & zip == "48018"
replace zip = "92530" if (unitid == 368753 | unitid == 114080) & zip == "92330"
replace zip = "65706" if unitid == 245412 & zip == "657092399"
replace zip = "63366" if unitid == 179858 & zip == "63166"
replace zip = "06525" if unitid == 129604 & zip == "65250"
replace zip = "08854" if city == "Piscataway" & fips == 34 // wrong
replace zip = "92553" if city == "SUNNYMEAD" & fips == 6 // wrong
replace zip = "80222" if unitid == 247524 & zip == "2225335"
replace fips = 44 if unitid == 128805 & fips == 9 & zip == "2891"
replace fips = 47 if unitid == 417789 & fips == 28 & zip == "38125"
replace fips = 26 if unitid == 452072 & fips == 24 & zip == "48047"

sort unitid year // get info from othe years when missing
by unitid: replace city = city[_n-1] if city == " " & fips[_n]==fips[_n-1]
by unitid: replace zip = zip[_n-1] if zip == " " & fips[_n]==fips[_n-1]

gen cityLower = lower(city)
replace cityLower = subinstr(cityLower," ", "",.)
replace cityLower = subinstr(cityLower,",", "",.)
replace cityLower = subinstr(cityLower,".", "",.)

gen zipLength = length(zip)
tab zipLength
gen zipIPEDS = zip
gen zero = 0
egen zipOriginal = concat(zero zip) if zipLength == 4 | zipLength == 8 
replace zipOriginal = zip if mi(zipOriginal)
drop zip zipLength
gen  zip = substr(zipOriginal,1,5)
label variable zip "zip = substr(zipOriginal,1,5)"
destring zip, replace

* fixing looking at the zipcode file
replace zip = 1085 if zip == 1086 & cityLower == "westfield"
replace zip = 1104 if zip == 1102 & cityLower == "springfield" // this zipcode probably doesn't exist anymore
replace zip = 1608 if zip == 1615 & cityLower == "worcester" // this zipcode probably doesn't exist anymore
replace zip = 2446 if zip == 2146 & cityLower == "brookline" // this zipcode probably doesn't exist anymore
replace zip = 2453 if (zip == 2154 | zip == 2254 | zip == 2454) & cityLower == "waltham" // this is probably a typo
replace zip = 62703 if (zip == 62194 | zip == 62708 | zip == 62769) & cityLower == "springfield" // this zipcode probably doesn't exist anymore
replace zip = 62704 if zip == 62794 & cityLower == "springfield" // this is probably a typo
replace zip = 2482 if (zip == 2157 | zip == 2181 | zip == 2457) & cityLower == "wellesley" // not sure what happened here
replace zip = 2481 if zip == 2181 & cityLower == "wellesleyhills" // not sure what happened here
replace zip = 2472 if zip == 2172 & cityLower == "watertown" // this is probably a typo
replace zip = 2301 if zip == 2401 & cityLower == "brockton" // this is probably a typo
replace zip = 2302 if zip == 2402 & cityLower == "brockton" // this is probably a typo
replace zip = 24011 if (zip == 24031 | zip == 24033) & cityLower == "roanoke" // the zipcode exists but is very little... I put the closest one 
replace zip = 24018 if zip == 24008 & cityLower == "roanoke" // this is probably a typo
replace zip = 2458 if zip == 2158 & cityLower == "newton" // this is probably a  typo
replace zip = 2459 if zip == 2159 & cityLower == "newtoncentre" // this is probably a a typo
replace zip = 24210 if zip == 24212 & cityLower == "abingdon" // this is probably a a typo
replace zip = 29801 if zip == 29802 & cityLower == "aiken" // this is probably a typo
replace zip = 31701 if (zip == 31708 | zip == 31702 ) & cityLower == "albany" // // the zipcode exists but is very little... I put the closest one 
replace zip = 12203 if zip == 12230 & cityLower == "albany" // this is probably a typo
replace zip = 87105 if zip == 87195 & cityLower == "albuquerque" // this is probably a typo
replace zip = 87102 if zip == 87184 & cityLower == "albuquerque" // this is probably a typo
replace zip = 35010 if zip == 35011 & cityLower == "alexandercity" // this is probably a typo
replace zip = 18109 if zip == 18001 & cityLower == "allentown" // P.O. only, I took the closest one
replace zip = 18102 if zip == 18105 & cityLower == "allentown" // P.O. only, I took the closest one
replace zip = 60803 if zip == 60658 & cityLower == "alsip"  // not sure what happened here
replace zip = 16602 if zip == 16603 & cityLower == "altoona" // P.O. only, I took the closest one
replace zip = 79103 if zip == 79114 & cityLower == "amarillo" // P.O. only, I took the closest one
replace zip = 46013 if zip == 46014 & cityLower == "anderson" // this is probably a typo
replace zip = 21401 if zip == 21404 & cityLower == "annapolis" // P.O. only, I took the closest one
replace zip = 48104 if (zip == 48016 | zip == 48106 | zip == 48107) & cityLower == "annarbor" // P.O. only, I took the closest one
replace zip = 36201 if zip == 36202 & cityLower == "anniston" // P.O. only, I took the closest one
replace zip = 54913 if zip == 54912 & cityLower == "appleton" // P.O. only, I took the closest one
replace zip = 2474 if zip == 2174 & cityLower == "arlington" // P.O. only, I took the closest one
replace zip = 20147 if zip == 22011 & cityLower == "ashburn" // this is probably a typo
replace zip = 27203 if zip == 27204 & cityLower == "asheboro" // P.O. only, I took the closest one
replace zip = 28801 if zip == 28816 & cityLower == "asheville" // P.O. only, I took the closest one
replace zip = 30601 if zip == 30610 & cityLower == "athens" // this is probably a typo
replace zip = 37303 if zip == 37371 & cityLower == "athens" // P.O. only, I took the closest one
replace zip = 30334 if zip == 30302 & cityLower == "atlanta" // P.O. only, I took the closest one
replace zip = 36502 if zip == 36504 & cityLower == "atmore" // P.O. only, I took the closest one
replace zip = 30901 if zip == 30910 & cityLower == "augusta" // this is probably a typo
replace zip = 39817 if zip == 31717 & cityLower == "bainbridge" // not sure what happened here
replace zip = 72501 if zip == 72503 & cityLower == "batesville" // P.O. only, I took the closest one
replace zip = 29902 if zip == 29901 & cityLower == "beaufort" // P.O. only, I took the closest one
replace zip = 61701 if zip == 61702 & cityLower == "bloomington" // P.O. only, I took the closest one
replace zip = 72315 if zip == 72316 & cityLower == "blytheville" // P.O. only, I took the closest one
replace zip = 79007 if zip == 79008 & cityLower == "borger" // P.O. only, I took the closest one
replace zip = 92821 if zip == 92621 & cityLower == "brea" // this is probably a typo
replace zip = 93923 if zip == 93921 & cityLower == "carmel" // P.O. only, I took the closest one
replace zip = 85122 if zip == 85222 & cityLower == "casagrande" // this is probably a typo
replace zip = 28280 if zip == 28232 & cityLower == "charlotte" // P.O. only, I took the closest one
replace zip = 63017 if zip == 63006 & cityLower == "chesterfield" // P.O. only, I took the closest one
replace zip = 2467 if zip == 2167 & cityLower == "chestnuthill" // this is probably a typo
replace zip = 60804 if zip == 60650 & cityLower == "cicero" // not sure what happened here
replace zip = 33764 if (zip == 34624 | zip ==34625 | zip ==34620 | zip ==34621 | zip ==34622 | zip ==33519) & cityLower == "clearwater" // not sure what happened here
replace zip = 37311 if zip == 37320 & cityLower == "cleveland" // P.O. only, I took the closest one
replace zip = 28328 if zip == 28329 & cityLower == "clinton" // P.O. only, I took the closest one
replace zip = 52732 if zip == 52733 & cityLower == "clinton" // P.O. only, I took the closest one
replace zip = 29220 if zip == 29202 & cityLower == "columbia" // this is probably a typo
replace zip = 38401 if zip == 38402 & cityLower == "columbia" // P.O. only, I took the closest one
replace zip = 32901 if zip == 31995 & cityLower == "columbus" // P.O. only, I took the closest one
replace zip = 75428 if zip == 75429 & cityLower == "commerce" // P.O. only, I took the closest one
replace zip = 30013 if zip == 30208 & cityLower == "conyers" // not sure what happened here
replace zip = 85128 if zip == 85228 & cityLower == "coolidge" // this is probably a typo
replace zip = 92236 if zip == 91720 & cityLower == "corona" // not sure what happened here
replace zip = 37620 if zip == 37625  & cityLower == "bristol" // P.O
replace zip = 39051 if zip == 39050  & cityLower == "carthage" // ERROR
replace zip = 39501 if zip == 39506  & cityLower == "gulfport" // P.O
replace zip = 447716 if zip == 47706  & cityLower == "evansville" // P.O
replace zip = 51503 if zip == 51502  & cityLower == "councilbluffs" // P.O
replace zip = 520013 if zip == 52004  & cityLower == "dubuque" // P.O
replace zip = 70560 if zip == 70562  & cityLower == "newiberia" // P.O
replace zip = 71055 if zip == 71058  & cityLower == "minden" // P.O
replace zip = 75501 if zip == 75502  & cityLower == "texarkana" // ERROR
replace zip = 75601 if zip == 75607  & cityLower == "longview" // P.O
replace zip = 76067 if zip == 76068  & cityLower == "mineralwells" // P.O
replace zip = 76541 if zip == 76540  & cityLower == "killeen" // P.O
replace zip = 88030 if zip == 88031  & cityLower == "deming" // P.O
replace zip = 90503 if zip == 90509  & cityLower == "torrance" // P.O
replace zip = 91702 if zip == 91700  & cityLower == "azusa" // ERROR
replace zip = 91932 if zip == 92032  & cityLower == "imperialbeach" // ERROR
replace zip = 91941 if zip == 92043  & cityLower == "lamesa" // ERROR
replace zip = 91976 if zip == 92077  & cityLower == "springvalley" // ERROR
replace zip = 92589 if zip == 92390  & cityLower == "temecula" // ERROR
replace zip = 92262 if zip == 92664  & cityLower == "palmsprings" // ERROR
replace zip = 96150 if zip == 95705  & cityLower == "southlaketahoe" // ERROR
replace zip = 96150 if zip == 96151  & cityLower == "southlaketahoe" // P.O
replace zip = 98177 if zip == 98160  & cityLower == "richmondbeach" // P.O
replace zip = 01754 if zip == 2178  & cityLower == "belmont" // P.O, but also the city is wrong(shall be mynard)
replace zip = 35020 if zip == 35021  & cityLower == "bessemer" // P.O
replace zip = 33134 if zip == 33124  & cityLower == "coralgables" // typo
replace zip = 38555 if zip == 38557  & cityLower == "crossville" // typo
replace zip = 30012 if zip == 30207 & cityLower == "conyers" // not sure what happened here
replace zip = 70578 if zip == 70527  & cityLower == "crowley" // P.O
replace zip = 39840 if zip == 31740  & cityLower == "cuthbert" // not sure what happened here
replace zip = 32114 if zip == 32115  & cityLower == "daytonabeach" // P.O
replace zip = 32114 if zip == 32014  & cityLower == "daytonabeach" // typo
replace zip = 35601 if zip == 35609  & cityLower == "decatur" // P.O
replace zip = 93215 if zip == 93216  & cityLower == "delano" // P.O
replace zip = 65459 if zip == 65419  & cityLower == "dixon" // typo
replace zip = 52003 if zip == 520013  & cityLower == "dubuque" // typo
replace zip = 28334 if zip == 28335  & cityLower == "dunn" // P.O
replace zip = 71730 if zip == 71731  & cityLower == "eldorado" // P.O
replace zip = 19127 if zip == 19117  & cityLower == "elkinspark" // typo
replace zip = 60707 if zip == 60635  & cityLower == "elmwoodpark" // not sure what happened here
replace zip = 79901 if zip == 79951  & cityLower == "elpaso" // P.O
replace zip = 36330 if zip == 36331  & cityLower == "enterprise" // P.O
replace zip = 47713 if zip == 447716  & cityLower == "evansville" // P.O
replace zip = 06824 if zip == 6432  & cityLower == "elkinspark" // typo
replace zip = 06825 if zip == 6430  & cityLower == "fairfield" // typo
replace zip = 35630 if zip == 35631  & cityLower == "florence" // P.O
replace zip = 30297 if zip == 30050  & cityLower == "forestpark" // not sure what happened here
replace zip = 34950 if zip == 33450  & cityLower == "fortpierce" // typo
replace zip = 92831 if (zip == 92634 | zip == 92631) & cityLower == "fullerton" // not sure what happened here
replace zip = 92835 if zip == 92635 & cityLower == "fullerton" // typo
replace zip = 35901 if zip == 35902  & cityLower == "gadsden" // P.O
replace zip = 95540 if zip == 95440 & cityLower == "garberville" // typo
replace zip = 92843 if zip == 92643 & cityLower == "gardengrove" // typo
replace zip = 92841 if zip == 92641 & cityLower == "gardengrove" // typo
replace zip = 92840 if zip == 92640 & cityLower == "gardengrove" // typo
replace zip = 28052 if zip == 28053  & cityLower == "glastonia" // P.O
replace zip = 27534 if zip == 27533  & cityLower == "goldsboro" // P.O
replace zip = 68801 if zip == 68802  & cityLower == "grandisland" // P.O
replace zip = 60457 if zip == 60547 & cityLower == "hickoryhills" // typo
replace zip = 71801 if zip == 71802  & cityLower == "hope" // P.O
replace zip = 89451 if zip == 89450  & cityLower == "inclinevillage" // P.O
replace zip = 64050 if zip == 64051  & cityLower == "independence" // P.O
replace zip = 34450 if zip == 32650 & cityLower == "inverness" // typo
replace zip = 92618 if zip == 92718 & cityLower == "irvine" // typo
replace zip = 92620 if zip == 92720 & cityLower == "irvine" // typo
replace zip = 92615 if zip == 92715 & cityLower == "irvine" // typo
replace zip = 92617 if zip == 92717 & cityLower == "irvine" // typo
replace zip = 39201 if zip == 39205  & cityLower == "jackson" // P.O
replace zip = 48933 if zip == 48901  & cityLower == "jackson" // P.O, but also the city is wrong(shall be lansing)
replace zip = 53545 if zip == 53547  & cityLower == "janesville" // P.O
replace zip = 28501 if zip == 28502  & cityLower == "kinston" // P.O
replace zip = 34743 if zip == 32743 & cityLower == "kissimmee" // typo
replace zip = 34742 if zip == 32742 & cityLower == "kissimmee" // typo
replace zip = 34741 if zip == 32741 & cityLower == "kissimmee" // typo
replace zip = 91941 if zip == 92041 & cityLower == "lamesa" // typo
replace zip = 91942 if zip == 92042 & cityLower == "lamesa" // typo
replace zip = 29720 if zip == 29721  & cityLower == "lancaster" // P.O
replace zip = 30046 if zip == 30246 & cityLower == "lawrenceville" // typo
replace zip = 30045 if zip == 30245 & cityLower == "lawrenceville" // typo
replace zip = 37087 if zip == 37088 & cityLower == "lebanon" // P.O
replace zip = 20175 if zip == 22075 & cityLower == "leesburg" // typo
replace zip = 4240 if zip == 4243 & cityLower == "lewiston" // P.O
replace zip = 01754 if zip == 2173 & cityLower == "lexington" // P.O, but also the city is wrong(shall be mynard)
replace zip = 27292 if zip == 27293 & cityLower == "lexington" // P.O
replace zip = 30047 if zip == 30247 & cityLower == "lilburn" // typo
replace zip = 19341 if zip == 19353 & cityLower == "lionville" // P.O, but also the city is wrong(shall be exton)
replace zip = 93402 if zip == 93412 & cityLower == "lososos" // P.O
replace zip = 28360 if zip == 28359 & cityLower == "lumberton" // P.O
replace zip = 20110 if zip == 22110 & cityLower == "manassas" // typo
replace zip = 20111 if zip == 22111 & cityLower == "manassaspark" // typo
replace zip = 6040 if zip == 6045 & cityLower == "manchester" // P.O
replace zip = 71655 if zip == 71656 & cityLower == "monticello" // P.O
replace zip = 94556 if zip == 94575 & cityLower == "moraga" // P.O
replace zip = 70380 if zip == 70381 & cityLower == "morgancity" // P.O
replace zip = 31768 if zip == 31776 & cityLower == "moultrie" // P.O
replace zip = 25601 if zip == 25637 & cityLower == "mountgay" // P.O
replace zip = 75455 if zip == 75456 & cityLower == "mountpleasant" // P.O
replace zip = 35661 if zip == 35662 & cityLower == "muscleshoals" // P.O
replace zip = 29577 if zip == 29578 & cityLower == "myrtlebeach" // P.O
replace zip = 60563 if zip == 60566 & cityLower == "naperville" // P.O
replace zip = 34112 if zip == 33962 & cityLower == "naples" // not sure what happened here
replace zip = 91950 if zip == 92050 & cityLower == "nationalcity" // typo
replace zip = 47630 if zip == 47629 & cityLower == "newburgh" // P.O
replace zip = 91321 if zip == 91322 & cityLower == "newhall" // P.O
replace zip = 34652 if zip == 33552 & cityLower == "newportrichey" // typo
replace zip = 60714 if zip == 60648 & cityLower == "niles" // not sure what happened here
replace zip = 92236 if zip == 91760 & cityLower == "norco" // not sure what happened here
replace zip = 68701 if zip == 68702 & cityLower == "norfolk" // P.O
replace zip = 34474 if zip == 32678 & cityLower == "ocala" // not sure what happened here
replace zip = 79761 if zip == 79760 & cityLower == "odessa" // P.O
replace zip = 36804 if zip == 36803 & cityLower == "opelika" // P.O
replace zip = 92866 if zip == 92666 & cityLower == "orange" // typo
replace zip = 92865 if zip == 92665 & cityLower == "orange" // typo
replace zip = 92869 if zip == 92669 & cityLower == "orange" // typo
replace zip = 92867 if zip == 92667 & cityLower == "orange" // typo
replace zip = 92868 if zip == 92668 & cityLower == "orange" // typo
replace zip = 32174 if zip == 32074 & cityLower == "ormondbeach" // typo
replace zip = 30054 if zip == 30267 & cityLower == "oxford" // not sure what happened here
replace zip = 36360 if zip == 36361 & cityLower == "ozark" // P.O
replace zip = 20197 if zip == 22129 & cityLower == "paeoniansprings" // not sure what happened here
replace zip = 32177 if zip == 32077 & cityLower == "palatka" // typo
replace zip = 7505 if zip == 7509 & cityLower == "paterson" // P.O
replace zip = 33782 if (zip == 34666 | zip == 33666) & cityLower == "pinellaspark" // not sure what happened here
replace zip = 33781 if zip == 34665 & cityLower == "pinellaspark" // not sure what happened here
replace zip = 92870 if zip == 92670 & cityLower == "placentia" // typo
replace zip = 7060 if zip == 7061 & cityLower == "plainfield" // P.O
replace zip = 70764 if zip == 70765 & cityLower == "plaquemine" // P.O
replace zip = 4101 if zip == 4104 & cityLower == "portland" // P.O
replace zip = 74361 if zip == 74362 & cityLower == "pryor" // P.O
replace zip = 32435 if zip == 32454 & cityLower == "ptwashington" // not sure what happened here
replace zip = 19601 if zip == 19603  & cityLower == "reading" // P.O
replace zip = 7450 if zip == 7451 & cityLower == "ridgewood" // P.O
replace zip = 82901 if zip == 82902 & cityLower == "rocksprings" // P.O
replace zip = 11570 if zip == 11571 & cityLower == "rockvillecentre" // P.O
replace zip = 3869 if zip == 38690 & cityLower == "rollingford" // typo
replace zip = 92583 if zip == 92383 & cityLower == "sanjacinto" // typo
replace zip = 94578 if zip == 95478 & cityLower == "sanleandro" // typo
replace zip = 87535 if zip == 87504 & cityLower == "santafe" // P.O
replace zip = 33772 if zip == 34642 & cityLower == "seminole" // not sure what happened here
replace zip = 57103 if zip == 57101 & cityLower == "siouxfalls" // P.O
replace zip = 96150 if zip == 95702 & cityLower == "southlaketahoe" // typo + P.O
replace zip = 91977 if zip == 91976 & cityLower == "springvalley" // P.O
replace zip = 25632 if zip == 25646 & cityLower == "stollings" // P.O
replace zip = 6615 if zip == 6497 & cityLower == "stratford" // not sure what happened here
replace zip = 7901 if zip == 7902 & cityLower == "summit" // P.O
replace zip = 71282 if zip == 71284 & cityLower == "tallulah" // P.O
replace zip = 92590 if zip == 92589 & cityLower == "temecula" // P.O
replace zip = 70301 if (zip == 70310 | zip == 70302)  & cityLower == "thibodaux" // P.O
replace zip = 43614 if zip == 43624 & cityLower == "toledo" // typo
replace zip = 92780 if zip == 92680 & cityLower == "tustin" // typo
replace zip = 49648 if zip == 48087 & cityLower == "utica" // not sure what happened here
replace zip = 8360 if zip == 8362 & cityLower == "vineland" // P.O
replace zip = 27587 if zip == 27588 & cityLower == "wakeforest" // P.O
replace zip = 23415 if zip == 23483 & cityLower == "wattsville" // P.O
replace zip = 60085 if zip == 60079 & cityLower == "waukegan" // P.O
replace zip = 2493 if zip == 2193 & cityLower == "weston" // typo
replace zip = 19601 if zip == 19603 & cityLower == "westreading" // P.O
replace zip = 23185 if zip == 23187 & cityLower == "williamsburg" // P.O
replace zip = 92886 if zip == 92686 & cityLower == "yorbalinda" // typo
replace zip = 92887 if zip == 92687 & cityLower == "yorbalinda" // typo
replace zip = 8873 if zip == 8890 & cityLower == "zarephath" // P.O
replace zip = 33540 if zip == 33539  & cityLower == "zephyrhills" // P.O
replace zip = 30303 if zip == 30335 & cityLower == "atlanta" // not sure what happened here
replace zip = 78701 if zip == 78768  & cityLower == "austin" // P.O
replace zip = 79968 if zip == 79998 & cityLower == "elpaso" // typo
replace zip = 80120 if zip == 80160  & cityLower == "littleton" // P.O
replace zip = 92101 if (zip == 92168 | zip == 92149)  & cityLower == "sandiego" // P.O
replace zip = 30338 if zip == 30356 & cityLower == "atlanta" // P.O
replace zip = 78701 if (zip == 78714 | zip == 78762) & cityLower == "austin" // P.O
replace zip = 71220 if zip == 71221 & cityLower == "bastrop" // P.O
replace zip = 77705 if zip == 77710 & cityLower == "beaumont" // P.O
replace zip = 58504 if (zip == 58506 | zip == 58502) & cityLower == "bismarck" // P.O
replace zip = 34210 if (zip == 33507 | zip == 33505) & cityLower == "bradenton" // P.O
replace zip = 91504 if (zip == 91510 | zip == 91515) & cityLower == "burbank" // P.O
replace zip = 16002 if zip == 16003 & cityLower == "butler" // P.O
replace zip = 2139 if zip == 2238 & cityLower == "cambridge" // P.O
replace zip = 52404 if zip == 52406 & cityLower == "cedarrapids" // P.O
replace zip = 27514 if zip == 27515 & cityLower == "chapelhill" // P.O
replace zip = 29406 if (zip == 29423 | zip == 29411)  & cityLower == "charleston" // P.O
replace zip = 28204 if zip == 28235 & cityLower == "charlotte" // P.O
replace zip = 60607 if zip == 60680 & cityLower == "chicago" // P.O
replace zip = 45219 if zip == 45221 & cityLower == "cincinnati" // P.O
replace zip = 91746 if zip == 91716 & cityLower == "cityofindustry" // typo
replace zip = 33760 if (zip == 33520 | zip == 33520)  & cityLower == "clearwater" // P.O
replace zip = 80918 if zip == 80933 & cityLower == "coloradosprings" // P.O
replace zip = 31901 if zip == 31993 & cityLower == "columbus" // P.O
replace zip = 38501 if zip == 38505 & cityLower == "cookeville" // P.O
replace zip = 54701 if zip == 54702 & cityLower == "eauclaire" // P.O
replace zip = 8837 if zip == 8818  & cityLower == "edison" // P.O
replace zip = 62025 if zip == 62026  & cityLower == "edwardsville" // P.O
replace zip = 27909 if zip == 27906  & cityLower == "elizabethcity" // P.O
replace zip = 86001 if zip == 86005  & cityLower == "flagstaff" 
replace zip = 54935 if zip == 54936  & cityLower == "fonddulac" 
replace zip = 33919 if zip == 33906  & cityLower == "fortmyers" 
replace zip = 72904 if zip == 72913  & cityLower == "fortsmith" 
replace zip = 93710 if (zip == 93740 | zip == 93741) & cityLower == "fresno" 
replace zip = 92832 if zip == 92632  & cityLower == "fullerton" 
replace zip = 35901 if zip == 35999  & cityLower == "gadsden" 
replace zip = 30501 if zip == 30503  & cityLower == "gainesville" 
replace zip = 77554 if zip == 77553  & cityLower == "galveston" 
replace zip = 81601 if zip == 81602  & cityLower == "glenwoodsprings"
replace zip = 80634 if zip == 80632  & cityLower == "greeley"
replace zip = 54303 if zip == 54307  & cityLower == "greenbay"
replace zip = 27412 if zip == 27402  & cityLower == "greensboro"
replace zip = 27834 if zip == 27835  & cityLower == "greenville"
replace zip = 70401 if zip == 70404  & cityLower == "hammond"
replace zip = 59601 if zip == 59620  & cityLower == "helena"
replace zip = 70360 if zip == 70361  & cityLower == "houma"
replace zip = 77002 if zip == 77270  & cityLower == "houston"
replace zip = 77030 if zip == 77225  & cityLower == "houston"
replace zip = 65101 if zip == 65102  & cityLower == "jeffersoncity"
replace zip = 66103 if zip == 66160  & cityLower == "kansascity"
replace zip = 53144 if zip == 53141  & cityLower == "kenosha"
replace zip = 37932 if zip == 37933  & cityLower == "knoxville"
replace zip = 46902 if zip == 46904  & cityLower == "kokomo"
replace zip = 54601 if zip == 54602  & cityLower == "lacrosse"
replace zip = 47905 if zip == 47903  & cityLower == "lafayette"
replace zip = 70501 if zip == 70502  & cityLower == "lafayette"
replace zip = 70605 if zip == 70609  & cityLower == "lakecharles"
replace zip = 70615 if zip == 70616  & cityLower == "lakecharles"
replace zip = 48933 if zip == 48901  & cityLower == "lansing"
replace zip = 91746 if zip == 91749  & cityLower == "lapuente"
replace zip = 40508 if zip == 40506  & cityLower == "lexington"
replace zip = 67901 if zip == 67905  & cityLower == "liberal"
replace zip = 53706 if zip == 53708  & cityLower == "madison"
replace zip = 30060 if zip == 30061  & cityLower == "marietta"
replace zip = 78501 if zip == 78502  & cityLower == "mcallen"
replace zip = 38103 if zip == 38163  & cityLower == "memphis"
replace zip = 38103 if zip == 38174  & cityLower == "memphis"
replace zip = 33174 if zip == 33199  & cityLower == "miami"
replace zip = 53221 if zip == 53201  & cityLower == "milwaukee"
replace zip = 95358 if zip == 95352  & cityLower == "modesto"
replace zip = 36104 if zip == 36101  & cityLower == "montgomery"
replace zip = 36104 if zip == 36103  & cityLower == "montgomery"
replace zip = 75961 if zip == 75962  & cityLower == "nacogdoches"
replace zip = 71457 if zip == 71458  & cityLower == "natchitoches"
replace zip = 8901 if zip == 8903  & cityLower == "newbrunswick"
replace zip = 70122 if zip == 70148  & cityLower == "neworleans"
replace zip = 32811 if zip == 32802  & cityLower == "orlando"
replace zip = 32401 if zip == 32402  & cityLower == "panamacity"
replace zip = 85004 if zip == 85002  & cityLower == "phoenix"
replace zip = 85306 if zip == 85069  & cityLower == "phoenix"
replace zip = 97201 if zip == 97207  & cityLower == "portland"
replace zip = 27607 if zip == 27695  & cityLower == "raleigh"
replace zip = 27601 if zip == 27611  & cityLower == "raleigh"
replace zip = 92507 if zip == 92521  & cityLower == "riverside"
replace zip = 30161 if zip == 30162  & cityLower == "rome"
replace zip = 88203 if zip == 88202  & cityLower == "roswell"
replace zip = 71270 if (zip == 71272 | zip == 71273) & cityLower == "ruston"
replace zip = 28146 if zip == 28145  & cityLower == "salisbury"
replace zip = 84123 if zip == 84130  & cityLower == "saltlakecity"
replace zip = 93405 if zip == 93403  & cityLower == "sanluisobispo"
replace zip = 92069 if zip == 92096  & cityLower == "sanmarcos"
replace zip = 94903 if zip == 94913  & cityLower == "sanrafael"
replace zip = 71107 if zip == 71137  & cityLower == "shreveport"
replace zip = 51106 if zip == 51102  & cityLower == "siouxcity"
replace zip = 70458 if zip == 70459  & cityLower == "slidell"
replace zip = 46615 if zip == 46634  & cityLower == "southbend"
replace zip = 6905 if zip == 6904  & cityLower == "stamford"
replace zip = 95207 if zip == 95201  & cityLower == "stockton"
replace zip = 33710 if zip == 33733  & cityLower == "stpetersburg"
replace zip = 75501 if zip == 75505  & cityLower == "texarkana"
replace zip = 43614 if zip == 43699  & cityLower == "toledo"
replace zip = 8753 if zip == 8754  & cityLower == "tomsriver"
replace zip = 8609 if zip == 8607  & cityLower == "trenton"
replace zip = 8650 if zip == 8625  & cityLower == "trenton"
replace zip = 35401 if zip == 35487  & cityLower == "tuscaloosa"
replace zip = 75789 if zip == 75711  & cityLower == "tyler"
replace zip = 91401 if zip == 91417  & cityLower == "vannuys"
replace zip = 71292 if zip == 71294  & cityLower == "westmonroe"
replace zip = 27127 if zip == 27117  & cityLower == "winston-salem"
replace zip = 98902 if zip == 98907  & cityLower == "yakima"
replace zip = 85365 if zip == 85366  & cityLower == "yuma"
replace zip = 33544 if zip == 34641  & cityLower == "largo"
replace zip = 70806 if zip == 70898  & cityLower == "batonrouge"
replace zip = 25801 if zip == 25802  & cityLower == "beckley"

replace zip = 62220 if zip == 62222 & cityLower == "belleville"
replace zip = 80302 if zip == 80308 & cityLower == "boulder"
replace zip = 6604 if zip ==  6601 & cityLower == "bridgeport"
replace zip = 28204 if zip == 28233 & cityLower == "charlotte"
replace zip = 45202 if zip == 45210 & cityLower == "cincinnati"
replace zip = 65201 if (zip == 65216 | zip == 65215) & cityLower == "columbia"
replace zip = 29203 if zip ==  29230 & cityLower == "columbia"
replace zip = 31904 if zip == 31994 & cityLower == "columbus"
replace zip = 75224 if zip == 75376 & cityLower == "dallas"
replace zip = 75205 if zip == 75275 & cityLower == "dallas"
replace zip = 95616 if zip == 95617 & cityLower == "davis"
replace zip = 10038 if zip == 10048 & cityLower == "newyork"
replace zip = 30533 if zip == 30597 & cityLower == "dahlonega"
replace zip = 45435 if zip == 45488 & cityLower == "dayton"
replace zip = 93405 if zip == 93403 & cityLower == "snluisobispo"
replace zip = 95688 if zip == 95696 & cityLower == "vacaville"
replace zip = 24019 if zip == 24020 & cityLower == "roanoke" 
replace zip = 91301 if zip == 91376 & cityLower == "agouar" 
replace zip = 79109 if zip == 79178 & cityLower == "amarillo" 
replace zip = 6010 if zip == 76005 & cityLower == "arlington"
replace zip = 40403 if zip == 40404 & cityLower == "berea"
replace zip = 49103 if zip == 49104 & cityLower == "berriensprings"
replace zip = 02465 if zip == 2466 | zip == 2162 // auburndale/newton
replace zip = 48604 if zip == 48710 & unitid == 172051 // Saginaw Valley State University
replace zip = 48706 if zip == 48710 & unitid == 169521 // delta college
replace zip = 95112 if zip == 95192 & cityLower == "sanjose"

replace zip = 6511 if zip == 6520 & cityLower == "newhaven"
replace zip = 6708 if zip == 6720 & cityLower == "waterbury"
replace zip = 7017 if zip == 7019 & cityLower == "eastorange"
replace zip = 7050 if zip == 7051 & cityLower == "orange"
replace zip = 7753 if zip == 7754 & cityLower == "neptune"
replace zip = 13502 if zip == 13503 & cityLower == "utica"
replace zip = 16504 if zip == 16512 & cityLower == "erie"
replace zip = 17403 if zip == 17405 & cityLower == "york"
replace zip = 19801 if zip == 19899 & cityLower == "wilmington"
replace zip = 32503 if zip == 32523 & cityLower == "pensacola"
replace zip = 32503 if (zip == 32523 | zip == 32516) & cityLower == "pensacola"
replace zip = 32801 if zip == 32858 & cityLower == "orlando"
replace zip = 33755 if zip == 33758 & cityLower == "clearwater"
replace zip = 35805 if zip == 35807 & cityLower == "huntsville"
replace zip = 38118 if zip == 38181 & cityLower == "memphis"
replace zip = 38134 if zip == 38184 & cityLower == "memphis"
replace zip = 40206 if zip == 40280 & cityLower == "louisville"
replace zip = 48823 if zip == 48826 & cityLower == "eastlansing"
replace zip = 53073 if zip == 53082 & cityLower == "sheboygan"
replace zip = 54303 if zip == 54305 & cityLower == "greenbay"
replace zip = 57105 if zip == 57117 & cityLower == "siouxfalls"
replace zip = 57701 if zip == 57709 & cityLower == "rapidcity"
replace zip = 59405 if zip == 59403 & cityLower == "greatfalls"
replace zip = 70501 if zip == 70505 & cityLower == "lafayette"
replace zip = 72201 if zip == 72215 & cityLower == "littlerock"
replace zip = 72923 if zip == 72917 & cityLower == "fortsmith"
replace zip = 73013 if zip == 73136 & cityLower == "oklahomacity"
replace zip = 73501 if zip == 73502 & cityLower == "lawton"
replace zip = 73701 if zip == 73702 & cityLower == "enid"
replace zip = 74804 if zip == 74802 & cityLower == "shawnee"
replace zip = 84606 if zip == 84603 & cityLower == "provo"
replace zip = 90292 if zip == 90309 & cityLower == "inglewood"
replace zip = 90301 if zip == 90306 & cityLower == "inglewood"
replace zip = 90401 if zip == 90407 & cityLower == "santamonica"
replace zip = 92410 if zip == 92414 & cityLower == "sanbernardino"
replace zip = 92505 if zip == 92515 & cityLower == "riverside"
replace zip = 84118 if zip == 84129 & cityLower == "taylorsville"
replace zip = 92831 if zip == 92633 & cityLower == "fullerton"
replace zip = 93702 if zip == 93715 & cityLower == "fresno"
replace zip = 95050 if zip == 95053 & cityLower == "santaclara"
replace zip = 94612 if zip == 96412 & cityLower == "oakland"
replace zip = 98033 if zip == 98083 & cityLower == "kirkland"
replace zip = 67357 if zip == 67657 & cityLower == "parsons" // typo
replace zip = 49829 if zip == 49289 & cityLower == "escanaba" // typo
replace zip = 42101 if zip == 42102 & cityLower == "bowlinggreen" // P.O
replace zip = 43701 if zip == 43201 & cityLower == "zanesville" // typo
replace zip = 94607 if zip == 90043 & cityLower == "oakland" // error
replace zip = 1982 if zip == 3833 & cityLower == "southhamilton" // error
replace zip = 10027 if zip == 7628 & cityLower == "newyork" // error
replace zip = 27262 if zip == 27268 & cityLower == "highpoint" // typo
replace zip = 27105 if zip == 27102 & cityLower == "winstonsalem" // P.O
replace zip = 86301 if zip == 86302 & cityLower == "prescot" // P.O
replace zip = 32065 if zip == 32067 & cityLower == "orangepark" // typo
replace zip = 34772 if zip == 34742 & cityLower == "kissimmee" // typo
replace zip = 85258 if (zip == 85067 | zip == 85061) & unitid == 105385 // P.O
replace zip = 28052 if zip == 28053 & cityLower == "gastonia" // P.O
replace zip = 63043 if zip == 64043 & cityLower == "marylandheight" // typo

*replace zip =  if zip ==  & cityLower == "" // typo


* movers - public
replace zip = 36867 if zip == 36869 & (unitid == 101028 | unitid == 102331) // the uni moved, the campus stayed there!
replace zip = 95608 if zip == 80301 & unitid == 123378 // not sure what happened here
replace zip = 81231 if zip == 81230 & unitid == 128391 // probably a zip code change overtime
replace zip = 55734 if zip == 55746 & unitid == 173531 // changes for only one year, probably a mistake
replace zip = 28645 if zip == 28638 & unitid == 198118 // this uni has a campus in boon and one in hudson... lenoir is in the middle (where thy also have a campus)
replace zip = 58763 if zip == 58554 & unitid == 200086 // typo
replace zip = 44691 if zip == 43210 & unitid == 204662 // ohio state uni, different campuses
replace zip = 44906 if zip == 43210 & unitid == 204680 // ohio state uni, different campuses
replace zip = 43302 if zip == 43210 & unitid == 204699 // ohio state uni, different campuses
replace zip = 45804 if zip == 43210 & unitid == 204671 // ohio state uni, different campuses
replace zip = 44846 if zip == 44848 & unitid == 205346 // typo
replace zip = 77340 if zip == 77341 & unitid == 227881 // probably a zip code change overtime
replace zip = 42303 if zip == 42301 & unitid == 247940 // probably a zip code change overtime
replace zip = 44232 if zip == 44216 & unitid == 364636 // not sure what happened here
replace zip = 17601 if zip == 17110 & unitid == 369172 // probably central office, but campus in lancaster
replace zip = 17042 if zip == 17110 & unitid == 369181 // probably central office, but campus in lebanon
replace zip = 72401 if zip == 72201 & unitid == 448336 // not sure what happened here

* movers - non-profit
replace zip = 92025 if (zip == 95122 | zip == 95110) & unitid == 111692 // they then report the system office 
replace zip = 94303 if zip == 94063 & unitid == 120698 // typo
replace zip = 7092 if zip == 60022 & unitid == 146630 // not sure what happened here
replace zip = 41339 if zip == 41385 & unitid == 157030 // the campus moved just 3 miles apart... enough to chance czone
replace zip = 4084 if zip == 4062 & unitid == 161518 // probably a zip code change overtime
replace zip = 28036 if zip == 28035 & unitid == 198385 // really on the borther... one building inside one outside
replace zip = 28601 if zip == 28603 & unitid == 198835 // few meters changes a lot 
replace zip = 58504 if zip == 58501 & unitid == 200554 // probably a zip code change overtime
replace zip = 37804 if zip == 37801 & unitid == 220710 // probably a zip code change overtime
replace zip = 37383 if zip == 37375 & unitid == 221519 // probably a zip code change overtime
replace zip = 37404 if zip == 27101 & unitid == 221856 // typo
replace zip = 27265 if zip == 27260 & unitid == 198747 // probably a zip code change overtime
replace zip = 12507 if zip == 12571 & unitid == 246789 // typo
replace zip = 95827 if zip == 95356 & unitid == 383774 // not sure what happened here
replace zip = 97305 if zip == 97021 & unitid == 404329 // not sure what happened here
replace zip = 37312 if zip == 37311 & unitid == 404718 // probably a zip code change overtime

merge m:1 zip using "$clean_data_education/duplicates.dta", keep(1 3) nogenerate

* copy zip within the same city-university 
sort unitid year cityLower zip
gen zip2 = zip
	replace zip2 = . if dup == .
egen zipMode= mode(zip2), by(unitid cityLower fips) maxmode
	replace zip = zipMode if mi(dup) & !mi(zipMode)
	drop zip2
	
* copy zip within the same city (less restrictive), only for-profit or non-profit prior 1993
sort cityLower year unitid  
by cityLower: replace zip = zip[_n-1] if mi(dup) & !mi(dup[_n-1]) & fips[_n]==fips[_n-1]
by cityLower: replace zip = zip[_n+1] if mi(dup) & !mi(dup[_n+1]) & fips[_n]==fips[_n+1]
drop dup

merge m:1 zip using "$clean_data_education/duplicates.dta", keep(1 3) nogenerate
count if dup == . 

*** merge the czones
joinby zip using "$xwalk_geo/ZIP_CZ.dta"
* check that the merge is ok 
quietly bysort unitid year:  gen dup2 = cond(_N==1,0,_n) 
	order unitid year zip czone ourRatio dup2
	sort unitid year zip 
	* it seems ok!

save "$clean_data_education/locationsYearly.dta", replace

* add non mainland universities 
append using "$clean_data_education/unitid_fips.dta"
	replace nonMainland = 0 if mi(nonMainland)
	
* deal with movers
* year group
gen yearGroup = 0
	replace yearGroup = 1990 if year <= 1991 
	replace yearGroup = 1994 if year >= 1992 & year <= 1994
	replace yearGroup = 2000 if year >= 1999 & year <= 2001
	replace yearGroup = 2007 if year >= 2006 & year <= 2008
	replace yearGroup = 2014 if year >= 2013
	drop if yearGroup == 0 // 1992
	
gen multiple = (dup2 > 0) // multiple czone in a given year
egen czone2 = mean(czone), by(unitid year) // this way also obs with multiple czones will have a unique identifier
	replace czone2=round(czone2, 1) // since there might be small differences due to rounding
egen media = mean(czone2), by(unitid yearGroup)
	replace media=round(media, 1) // since there might be small differences due to rounding
gen moved = (media != czone2 & year > 1992) // one when the unitid moved in the three year window
egen mover = max(moved), by(unitid) // unitid that move at leaste once
	
*br unitid year zip zipOriginal czone czone2 media moved cityLower instnm if moved2 == 1 & control == 2 
* NB. Some uni still change location. Those are for profit, changes that will be addressed through collaple (last) when 
* we take the 3 years average or drops that I'll do in final cleanings
	
**********************************************************
* Run it all together, save labels and collapse
foreach v of var * {
local l`v' : variable label `v'
	if `"`l`v''"' == "" {
	local l`v' "`v'"
  	}
}
collapse (last) zip fips statefip city region division name_czone instnm zipOriginal ourRatio nonMainland mover, by(unitid czone yearGroup)

foreach v of var * {
label var `v' "`l`v''"
}
**********************************************************

quietly bysort unitid yearGroup:  gen dup = cond(_N==1,0,_n)
egen period = count(unitid) if dup < 2 & year != 1990, by (unitid)
egen periods = max(period), by(unitid)
	replace periods = 0 if mi(periods)
	label variable periods "# of yearGroup available post-90"
	drop dup period

order unitid yearGroup czone ourRatio mover periods
sort unitid yearGroup

save "$clean_data_education/locations.dta", replace
rm "$clean_data_education/duplicates.dta"
rm "$clean_data_education/unitid_fips.dta"

* recall that the missing ourRatio are from ons that have nonMainLand == 1

